################################################################
#####
#####   Input: Raw data from YouGov
#####   Output: Reshaped and recoded data for analysis
#####


setwd("/Users/lotte/Dropbox/projects/productivity_experiment/replication")

rm(list = ls())

# Load libraries

library(data.table)
library(plyr)
library(dplyr)
library(tidyverse)

# Load data

productivity <- read.csv("data/experiment_results.csv")

# Drop non-consents

productivity <- productivity[!productivity$HSConsent == 2, ]


# Pair profiles

productivity$male_female_first <-
  ifelse(productivity$first_name_A == 1 |
           productivity$first_name_A == 2,
         "Man",
         "Woman")
productivity$male_female_second <-
  ifelse(productivity$first_name_B == 1 |
           productivity$first_name_B == 2,
         "Man",
         "Woman")

productivity$gender_pairing <-
  factor(NA, levels = c("Man-Man", "Woman-Woman", "Man-Woman"))
productivity$gender_pairing[productivity$male_female_first == "Man" &
                              productivity$male_female_second == "Man"] <- "Man-Man"
productivity$gender_pairing[productivity$male_female_first == "Woman" &
                              productivity$male_female_second == "Woman"] <- "Woman-Woman"
productivity$gender_pairing[productivity$male_female_first == "Man" &
                              productivity$male_female_second == "Woman" |
                              productivity$male_female_first == "Woman" &
                              productivity$male_female_second == "Man"] <- "Man-Woman"
table(productivity$gender_pairing)

# Drop variables

productivity$Q2 <- NULL
productivity$Q3 <- NULL
productivity$Q1_2 <- factor(NA, levels = c("A", "B"))

# Stack data

productivity$Q1_2[productivity$Q1 == "1"] <- "A"
productivity$Q1_2[productivity$Q1 == "2"] <- "B"

a_cases <- productivity %>%
  select(-ends_with("_B")) %>%
  rename_at(vars(ends_with("_A")), ~ str_remove(., "_A")) %>%
  mutate(Q1 = ifelse(Q1_2 == "A", 1, 0)) %>%
  select(-Q2b,-Q3b) %>%
  rename(Q2 = Q2a, Q3 = Q3a)

b_cases <- productivity %>%
  select(-ends_with("_A")) %>%
  rename_at(vars(ends_with("_B")), ~ str_remove(., "_B")) %>%
  mutate(Q1 = ifelse(Q1_2 == "B", 1, 0)) %>%
  select(-Q2a,-Q3a) %>%
  rename(Q2 = Q2b, Q3 = Q3b)

cases <- rbind(a_cases, b_cases)

# Change all "99" to NA
cases <- cases %>%
  mutate_at(vars(Q2, Q3), ~ na_if(x = ., y = 99))

# Create variables

cases$mp_gender <-
  as.factor(ifelse(cases$first_name == 1 |
                     cases$first_name == 2, "Man", "Woman"))

cases$mp_party <-
  as.factor(ifelse(cases$party == 1, "Labour", "Conservative"))

cases$respondent_voted_2019 <-
  factor(NA, levels = c("FALSE", "TRUE"))
cases$respondent_voted_2019[cases$voted_ge_2019 == 1] <- "TRUE"
cases$respondent_voted_2019[cases$voted_ge_2019 == 2] <- "FALSE"
table(cases$respondent_voted_2019)

cases$respondent_party_vote <-
  factor(NA, levels = c("Labour", "Conservative"))
cases$respondent_party_vote[cases$pastvote_ge_2019 == 1] <-
  "Conservative"
cases$respondent_party_vote[cases$pastvote_ge_2019 == 2] <- "Labour"
table(cases$respondent_party_vote)

cases$respondent_brexit <-
  ifelse(cases$pastvote_EURef == 1, "Remain", "Leave")

cases$respondent_gender <-
  as.factor(ifelse(cases$profile_gender == 1, "Man", "Woman"))

cases$respondent_political_attention <-
  as.integer(cases$political_attention)

cases$respondent_has_degree <-
  ifelse(cases$highest_education_gce == 6, "TRUE", "FALSE")

cases$respondent_social_grade <-
  ifelse(
    cases$profile_socialgrade_cie == 1 |
      cases$profile_socialgrade_cie == 2 |
      cases$profile_socialgrade_cie ==
      3,
    "ABC1",
    "C2DE"
  )

cases <- cases %>%
  mutate(politics_scale_profile_update = na_if(x = politics_scale_profile_update, y =
                                                 8))

cases$respondent_left_right <-
  as.integer(cases$politics_scale_profile_update)

cases$gender_congruence <-
  as.factor(
    ifelse(
      cases$mp_gender == "Man" & cases$respondent_gender == "Man" |
        cases$mp_gender == "Woman" &
        cases$respondent_gender == "Woman",
      "TRUE",
      "FALSE"
    )
  )

cases$party_congruence <- factor(NA, levels = c("FALSE", "TRUE"))
cases$party_congruence[cases$mp_party == "Labour" &
                         cases$respondent_party_vote == "Labour" |
                         cases$mp_gender == "Conservative" &
                         cases$respondent_party_vote == "Conservative"] <- "TRUE"
cases$party_congruence[cases$mp_party == "Labour" &
                         cases$respondent_party_vote == "Conservative" |
                         cases$mp_gender == "Conservative" &
                         cases$respondent_party_vote == "Labour"] <- "FALSE"
table(cases$party_congruence)

cases$objective_performance <-
  cases$sit_notsit + cases$campaign_result + cases$less_more + cases$rarely_often

cases$performance_0_dummy <-
  ifelse(cases$objective_performance == 0, "TRUE", "FALSE")
cases$performance_1_dummy <-
  ifelse(cases$objective_performance == 1, "TRUE", "FALSE")
cases$performance_2_dummy <-
  ifelse(cases$objective_performance == 2, "TRUE", "FALSE")
cases$performance_3_dummy <-
  ifelse(cases$objective_performance == 3, "TRUE", "FALSE")
cases$performance_4_dummy <-
  ifelse(cases$objective_performance == 4, "TRUE", "FALSE")

cases$respondent_age <- cut(
  cases$age,
  breaks = c(17, 34, 54, 93),
  labels = c("18-34", "35-54", "55+")
)
table(cases$respondent_age)

# Match variables to PAP
cases$committee_membership <- cases$sit_notsit
cases$issue_campaigning <- cases$campaign_result
cases$voting_legislation <- cases$less_more
cases$constituency_responsiveness <- cases$rarely_often
cases$electability <- cases$Q2
cases$perceived_performance <- cases$Q3
cases$preference <- cases$Q1

# AMCE
## recode to meaningful factor names for plot
cases$committee_membership_clean <-
  factor(NA, levels = c("Does not sit", "Sits on several"))
cases$committee_membership_clean[cases$committee_membership == 0] <-
  "Does not sit"
cases$committee_membership_clean[cases$committee_membership == 1] <-
  "Sits on several"
table(cases$committee_membership_clean)
cases$committee_membership <- cases$committee_membership_clean

cases$issue_campaigning_clean <-
  factor(NA,
         levels = c("Unsuccessfully campaigned", "Successfully campaigned"))
cases$issue_campaigning_clean[cases$issue_campaigning == 0] <-
  "Unsuccessfully campaigned"
cases$issue_campaigning_clean[cases$issue_campaigning == 1] <-
  "Successfully campaigned"
table(cases$issue_campaigning_clean)
cases$issue_campaigning <- cases$issue_campaigning_clean

cases$voting_legislation_clean <-
  factor(NA, levels = c("Less productive", "More productive"))
cases$voting_legislation_clean[cases$voting_legislation == 0] <-
  "Less productive"
cases$voting_legislation_clean[cases$voting_legislation == 1] <-
  "More productive"
table(cases$voting_legislation_clean)
cases$voting_legislation <- cases$voting_legislation_clean

cases$constituency_responsiveness_clean <-
  factor(NA, levels = c("Rarely", "Often"))
cases$constituency_responsiveness_clean[cases$constituency_responsiveness ==
                                          0] <- "Rarely"
cases$constituency_responsiveness_clean[cases$constituency_responsiveness ==
                                          1] <- "Often"
table(cases$constituency_responsiveness_clean)
cases$constituency_responsiveness <-
  cases$constituency_responsiveness_clean

# Drop unnecessary bits
cases$Q1_2 <- NULL
cases$starttime <- NULL
cases$endtime <- NULL
cases$HSConsent <- NULL
cases$profile_GOR <- NULL
cases$voted_ge_2019 <- NULL
cases$pastvote_ge_2019 <- NULL
cases$pastvote_EURef <- NULL
cases$profile_gender <- NULL
cases$profile_gross_household <- NULL
cases$profile_marital_stat <- NULL
cases$profile_socialgrade_cie <- NULL
cases$politics_scale_profile_update <- NULL
cases$highest_education_gce <- NULL
cases$surname <- NULL
cases$first_name <- NULL
cases$he_she <- NULL
cases$his_her <- NULL
cases$party <- NULL
cases$W8 <- NULL
cases$text_mp1 <- NULL
cases$text_mp2 <- NULL
cases$sit_notsit <- NULL
cases$campaign_result <- NULL
cases$campaign_result_motive <- NULL
cases$less_more <- NULL
cases$rarely_often <- NULL
cases$Q2 <- NULL
cases$Q1 <- NULL
cases$Q3 <- NULL
cases$issue_campaigning_clean <- NULL
cases$voting_legislation_clean <- NULL
cases$committee_membership_clean <- NULL
cases$constituency_responsiveness_clean <- NULL
cases$respondent_age_1 <- NULL
cases$male_female_first <- NULL
cases$male_female_second <- NULL

# Save clean dataset
productivity <- cases
save(productivity, file = "data/productivity.Rdata")

